Last Update: 2019-05-01 16:02:25
Let’s load some libraries in first.
library(baseballr)
library(pitchRx)
library(tidyverse)
Let’s also clean out environment.
rm(list = ls())
With these libraries, we can get out data as well as visaulize it. Let’s take a look at some players to see what we can look at.
Here are the list of players I will be looking at.
Let’s now scrape the data for each player.
scrape.data = function(start, id) {
data = scrape_statcast_savant(start_date = start,
end_date = "2019-04-22",
playerid = id,
player_type = 'pitcher')
data
}
start = "2019-01-01"
syndergaard.data = scrape.data(start, 592789)
corbin.data = scrape.data(start, 571578)
vazquez.data = scrape.data(start, 553878)
stroman.data = scrape.data(start, 573186)
verlander.data = scrape.data(start, 434378)
treinen.data = scrape.data(start, 595014)
Now with our data, let’s get the information we want out of it.
filter.data = function(data) {
filtered = data.frame(name = data %>% pull(player_name),
pitch = data %>% pull(pitch_type),
outcome = data %>% pull(type),
date = data %>% pull(game_date),
event = data %>% pull(events),
descrip = data %>% pull(description),
xcoord = data %>% pull(plate_x),
ycoord = data %>% pull(plate_z),
xmove = data %>% pull(pfx_x),
ymove = data %>% pull(pfx_z),
velo = data %>% pull(effective_speed),
spin = data %>% pull(release_spin_rate),
exvelo = data %>% pull(launch_speed),
exang = data %>% pull(launch_angle))
filtered
}
syndergaard = filter.data(syndergaard.data)
corbin = filter.data(corbin.data)
stroman = filter.data(stroman.data)
treinen = filter.data(treinen.data)
vazquez = filter.data(vazquez.data)
verlander = filter.data(verlander.data)
With this filtered data, we have selected the following columns:
Let’s start visualizing some of this data. Before that, let me define a strikezone. This strikezone was taken from the website Baseball with R
topKzone = 3.5
botKzone = 1.6
inKzone = -.95
outKzone = 0.95
kZone = data.frame(x = c(inKzone, inKzone, outKzone, outKzone, inKzone),
y = c(botKzone, topKzone, topKzone, botKzone, botKzone))
Let’s look at pitch location via pitch type.
graph.pitch.heatmap.type = function(player) {
graph = ggplot(player) +
geom_jitter(aes(x = player$xcoord,
y = player$ycoord,
color = player$pitch)) +
xlab("Horizontal Position") +
ylab("Vertical Position") +
ggtitle(paste(player$name[1], "Heatmap", sep = " ")) +
labs(color = "Pitch Type") +
theme_minimal() + geom_path(aes(x, y), data = kZone)
graph
}
corbin.heatmap.type = graph.pitch.heatmap.type(corbin)
corbin.heatmap.type
stroman.heatmap.type = graph.pitch.heatmap.type(stroman)
stroman.heatmap.type
syndergaard.heatmap.type = graph.pitch.heatmap.type(syndergaard)
syndergaard.heatmap.type
## Warning: Removed 1 rows containing missing values (geom_point).
treinen.heatmap.type = graph.pitch.heatmap.type(treinen)
treinen.heatmap.type
## Warning: Removed 23 rows containing missing values (geom_point).
vazquez.heatmap.type = graph.pitch.heatmap.type(vazquez)
vazquez.heatmap.type
verlander.heatmap.type = graph.pitch.heatmap.type(verlander)
verlander.heatmap.type
Let’s look at pitch location via velocity.
graph.pitch.heatmap.velo = function(player.data) {
graph = ggplot(player.data) +
geom_jitter(aes(x = player.data$xcoord,
y = player.data$ycoord,
color = player.data$velo)) +
xlab("Horizontal Position") +
ylab("Vertical Position") +
ggtitle(paste(player.data$name[1], "Heatmap", sep = " ")) +
labs(color = "Velocity") +
scale_color_gradient(low = "blue", high = "red") +
theme_minimal() + geom_path(aes(x, y), data = kZone)
graph
}
corbin.heatmap.velo = graph.pitch.heatmap.velo(corbin)
corbin.heatmap.velo
stroman.heatmap.velo = graph.pitch.heatmap.velo(stroman)
stroman.heatmap.velo
syndergaard.heatmap.velo = graph.pitch.heatmap.velo(syndergaard)
syndergaard.heatmap.velo
## Warning: Removed 1 rows containing missing values (geom_point).
treinen.heatmap.velo = graph.pitch.heatmap.velo(treinen)
treinen.heatmap.velo
## Warning: Removed 23 rows containing missing values (geom_point).
vazquez.heatmap.velo = graph.pitch.heatmap.velo(vazquez)
vazquez.heatmap.velo
verlander.heatmap.velo = graph.pitch.heatmap.velo(verlander)
verlander.heatmap.velo
To view the movement, let’s just determine the average movement for each type of pitch that each player has. First let’s make a few helpful functions for us.
graph.pitch.xmovement = function(player) {
graph = ggplot(player) +
geom_boxplot(aes(x = player$pitch,
y = player$xmove,
color = player$pitch)) +
coord_flip() +
labs(color = "Pitch Type") +
xlab("Pitch Type") + ylab("Horizontal Movement") +
ggtitle(paste(player$name[1], "Horizontal Movement", sep = " ")) +
theme_minimal()
}
graph.pitch.ymovement = function(player) {
graph = ggplot(player) +
geom_boxplot(aes(x = player$pitch,
y = player$ymove,
color = player$pitch)) +
labs(color = "Pitch Type") +
xlab("Pitch Type") + ylab("Vertical Movement") +
ggtitle(paste(player$name[1], "Vertical Movement", sep = " ")) +
theme_minimal()
}
corbin.xmove = graph.pitch.xmovement(corbin)
corbin.ymove = graph.pitch.ymovement(corbin)
corbin.xmove
corbin.ymove
stroman.xmove = graph.pitch.xmovement(stroman)
stroman.ymove = graph.pitch.ymovement(stroman)
stroman.xmove
stroman.ymove
syndergaard.xmove = graph.pitch.xmovement(syndergaard)
syndergaard.ymove = graph.pitch.ymovement(syndergaard)
syndergaard.xmove
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
syndergaard.ymove
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
treinen.xmove = graph.pitch.xmovement(treinen)
treinen.ymove = graph.pitch.ymovement(treinen)
treinen.xmove
## Warning: Removed 23 rows containing non-finite values (stat_boxplot).
treinen.ymove
## Warning: Removed 23 rows containing non-finite values (stat_boxplot).
vazquez.xmove = graph.pitch.xmovement(vazquez)
vazquez.ymove = graph.pitch.ymovement(vazquez)
vazquez.xmove
vazquez.ymove
verlander.xmove = graph.pitch.xmovement(verlander)
verlander.ymove = graph.pitch.ymovement(verlander)
verlander.xmove
verlander.ymove
We need to separate each pitch first by type. Then we can see how the pitch’s velocity changed over time.
graph.pitch.velo = function(player) {
graph = ggplot(player) +
geom_line(aes(x = 1:length(player$velo),
y = player$velo,
color = player$pitch)) +
xlab("Pitches Thrown") + ylab("Velocity") + labs(color = "Pitch Type") +
ggtitle(paste(player$name[1], "Pitch Velocity Chart", sep = " ")) +
theme_minimal()
}
corbin.velo = graph.pitch.velo(corbin)
corbin.velo
stroman.velo = graph.pitch.velo(stroman)
stroman.velo
syndergaard.velo = graph.pitch.velo(syndergaard)
syndergaard.velo
## Warning: Removed 1 rows containing missing values (geom_path).
treinen.velo = graph.pitch.velo(treinen)
treinen.velo
## Warning: Removed 23 rows containing missing values (geom_path).
vazquez.velo = graph.pitch.velo(vazquez)
vazquez.velo
verlander.velo = graph.pitch.velo(verlander)
verlander.velo
graph.pitch.spin = function(player) {
graph = ggplot(player) +
geom_step(aes(x = 1:length(player$spin),
y = player$spin,
color = player$pitch),
direction = "vh") +
xlab("Pitches Thrown") + ylab("Spin Rate") + labs(color = "Pitch Type") +
ggtitle(paste(player$name[1], "Pitch Spin Rate Chart", sep = " ")) +
theme_minimal()
}
corbin.spin = graph.pitch.spin(corbin)
corbin.spin
stroman.spin = graph.pitch.spin(stroman)
stroman.spin
syndergaard.spin = graph.pitch.spin(syndergaard)
syndergaard.spin
## Warning: Removed 1 rows containing missing values (geom_path).
treinen.spin = graph.pitch.spin(treinen)
treinen.spin
## Warning: Removed 23 rows containing missing values (geom_path).
vazquez.spin = graph.pitch.spin(vazquez)
vazquez.spin
verlander.spin = graph.pitch.spin(verlander)
verlander.spin
I’ll be looking at a few specific Pittsburgh Pirates pitchers and looking at them from year to year.